Initialization


In [ ]:
import sys
import cPickle

import numpy
import scipy

from sklearn.manifold import TSNE

import matplotlib.pyplot as plt

import seaborn as sns

In [ ]:
%matplotlib inline

In [ ]:
# some seaborn initialization for prettier plots
def init_seaborn():
    sns.set_style('darkgrid')
    sns.set_palette('muted')
    sns.set_context("notebook", font_scale=1.5,
                    rc={"lines.linewidth": 2.5})

RS = 20151012
init_seaborn()

Reading data


In [ ]:
# read vectors, one word per row

def read_data(fn):
    d = cPickle.load(open(fn))
    labels, vectors = {}, []
    for k, v in d.iteritems():
        labels[k] = len(labels)
        vectors.append(v)
    return labels, numpy.array(vectors)

In [ ]:
labels, vectors = read_data("all_feat_vectors")
print 'nal', 'ies', scipy.spatial.distance.cosine(vectors[labels['nal']], vectors[labels['ies']])

training t-sne


In [ ]:
tsne = TSNE(random_state=RS, learning_rate=200, verbose=2, perplexity=20, metric=scipy.spatial.distance.cosine)
proj = tsne.fit_transform(vectors)

In [ ]:
proj.shape

In [ ]:
proj[labels['lly']]

Plotting


In [ ]:
f = plt.figure(figsize=(12, 12))
ax = plt.subplot(aspect='equal')
sc = ax.scatter(proj[:,0], proj[:,1], lw=0, s=40)
plt.xlim(-25, 25)
plt.ylim(-25, 25)
ax.axis('off')
ax.axis('tight')
to_annotate = ['ion', 'ity', 'ism', 'tor', 'age',
               'ncy', 'hip', 'ium', 'ney', 'cer',
               'ked', 'ged', 'red', 'ied', 'ced',
               'tic', 'ful', 'ary', 'cal', 'lar']
for l in to_annotate:
    ax.annotate(l, proj[labels[l]], color='g', fontsize=20)
plt.show()